<?php
/**
 * Created by PhpStorm.
 * User: hp
 * Date: 2018/3/30
 * Time: 13:51
 */

include_once __DIR__ . '/vendor/autoload.php';
define('ROOT_PATH', __DIR__ . DIRECTORY_SEPARATOR);

$file_dir = ROOT_PATH . 'spider_province_city_county' . DIRECTORY_SEPARATOR;
$data_chunk_export_file = $file_dir . 'data_chunk_export.php';

function format($html)
{
    return trim(strip_tags(utf8_encode($html)));
}

function store_file($file_name, $data)
{
    return file_put_contents($file_name, "<?php\nreturn " . var_export($data, 1) . ';');
}


$file = 'http://www.mca.gov.cn/article/sj/tjbz/a/2018/201803/201803191002.html';

echo "开始从网页[$file]获取数据...\n";

phpQuery::newDocumentFileHTML($file);
$data = pq('tr[height="19"]>td.xl7012452')->texts();
$data_chunk = array_chunk(array_filter($data), 2);

echo "获取数据成功\n";

//if (!file_exists($file_dir)) mkdir($file_dir);
//if (store_file($data_chunk_export_file,$data_chunk)) echo '数据生成成功';
//$data_chunk = include_once $data_chunk_export_file;

$current_province = [];
$current_city = [];
//获取所有省市县
if ($data_chunk) {
    foreach ($data_chunk as $k => $v) {
        $v_province_prefix = substr($v[0], 0, 2);
        if (preg_match('/' . $v_province_prefix . '0{4}/', $v[0])) {
            $province['code'] = $v[0];
            $province['name'] = $v[1];
            $province['code_prefix'] = $v_province_prefix;
            $current_province = $province;
            $provinces[] = $province;
        } else if (preg_match('/' . $current_province['code_prefix'] . '\d{2}0{2}/', $v[0]) || in_array($v_province_prefix, [11, 12, 31, 50])) {
            $city['code'] = $v[0];
            $city['name'] = $v[1];
            $city['province_code'] = $current_province['code'];
            $current_city = $city;
            $cities[] = $city;
        } else {
            $county['code'] = $v[0];
            $county['name'] = $v[1];
            $county['city_code'] = $current_city['code'];
            $counties[] = $county;
        }
    }
}

if ($provinces) store_file($file_dir . 'provinces.php', $provinces);
if ($cities) store_file($file_dir . 'cites.php', $cities);
if ($counties) store_file($file_dir . 'counties.php', $counties);

echo "获取所有省市县成功\n";

